Our dataset does not contain data for October, November and December of the year 2022. We are interested in the following main felonies: Burglary, Felony Assault, Grand Larceny, Grand Larceny of Motor Vehicle, Murder and Non-negligent Manslaughter, Rape, and Robbery.
complaint %>%
drop_na(borough, offense) %>%
group_by(borough) %>%
summarize(n_obs = n()) %>%
ggplot(aes(x = reorder(borough, -n_obs), y = n_obs, fill = reorder(borough, -n_obs))) +
geom_bar(stat = 'identity') +
labs(
title = "Frequency of Felonies by Borough (2016-2022)",
x = "Borough",
y = "Frequency"
) +
theme(legend.position = "none")

complaint %>%
drop_na(offense) %>%
group_by(offense) %>%
summarize(n_obs = n()) %>%
mutate(offense = reorder(offense, n_obs)) %>%
ggplot(aes(x = offense, y = n_obs, fill = offense)) +
geom_bar(stat = 'identity') +
labs(
title = "Frequency of Felonies (2016-2022)",
x = "Felony",
y = "Frequency"
) +
coord_flip() +
theme(legend.position = "none")

complaint %>%
filter(year != 2022) %>%
drop_na(offense) %>%
group_by(offense, year) %>%
summarize(n_obs = n()) %>%
ggplot(aes(x = year, y = n_obs, fill = offense)) +
geom_bar(stat = 'identity') +
labs(
title = "Frequency of 7 Felonies by Year (2016-2021)",
x = "Year",
y = "Frequency",
fill = "Felony"
) +
theme(legend.position = "bottom") +
guides(fill = guide_legend(nrow = 4,byrow = TRUE))

complaint %>%
drop_na(offense, vic_sex) %>%
group_by(year, vic_sex) %>%
summarize(n_obs = n()) %>%
group_by(year) %>%
summarize(vic_sex, percentage = n_obs/sum(n_obs)) %>%
ggplot(aes(x = year, y = percentage, fill = vic_sex)) +
geom_bar(stat = 'identity') +
labs(
x = "Year",
y = "Proportion",
title = "Proportions of Felonies by Victim Sex and Year",
fill = "Victim Sex"
)

complaint %>%
drop_na(offense, vic_age_group) %>%
group_by(year, vic_age_group) %>%
summarize(n_obs = n()) %>%
group_by(year) %>%
summarize(vic_age_group, percentage = n_obs/sum(n_obs)) %>%
ggplot(aes(x = year, y = percentage, fill = vic_age_group)) +
geom_bar(stat = 'identity') +
labs(
x = "Year",
y = "Proportion",
title = "Proportions of Felonies by Victim Age Group and Year",
fill = "Victim Age Group"
)

complaint %>%
drop_na(offense, vic_race) %>%
group_by(year, vic_race) %>%
summarize(n_obs = n()) %>%
group_by(year) %>%
summarize(vic_race, percentage = n_obs/sum(n_obs)) %>%
ggplot(aes(x = year, y = percentage, fill = vic_race)) +
geom_bar(stat = 'identity') +
labs(
x = "Year",
y = "Proportion",
title = "Proportions of Felonies by Victim Race and Year",
fill = "Victim Race"
)

num_days = function(month, year) {
year = as.integer(year)
months = 1:12
names(months) = month.abb
month = months[month]
as.numeric(strftime(as.Date(paste(year + month %/% 12, month %% 12 + 1, "01", sep = "-")) - 1, "%d"))
}
complaint %>%
drop_na(offense) %>%
mutate(year = fct_rev(year)) %>%
group_by(year, month) %>%
summarize(mean_freq = n() / num_days(month, year)) %>%
plot_ly(
x = ~month, y = ~year, z = ~mean_freq,
type = "heatmap"
) %>%
colorbar(x = 1, y = 1) %>%
layout(
title = "Average Daily Felony Frequency by Year and Month",
xaxis = list(title = "Month"),
yaxis = list(title = "Year")
)